import pandas as pd
# Retrieve HTML table data
url = 'https://www.basketball-reference.com/leagues/NBA_2021_per_game.html'
html = pd.read_html(url, header = 0)
df2019 = html[0]
# Data cleaning
raw = df2019.drop(df2019[df2019.Age == 'Age'].index)
raw
| Rk | Player | Pos | Age | Tm | G | GS | MP | FG | FGA | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | ... | .509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | 2 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | ... | NaN | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | 3 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | ... | .444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | 4 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | ... | .799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | 5 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | ... | .872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 536 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | ... | .833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 727 | 537 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | ... | .628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 728 | 538 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | ... | .886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 729 | 539 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | ... | .714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 730 | 540 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | ... | .789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
705 rows × 30 columns
raw.shape
(705, 30)
raw.head()
| Rk | Player | Pos | Age | Tm | G | GS | MP | FG | FGA | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | ... | .509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | 2 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | ... | NaN | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | 3 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | ... | .444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | 4 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | ... | .799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | 5 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | ... | .872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
5 rows × 30 columns
raw.isnull().sum()
Rk 0 Player 0 Pos 0 Age 0 Tm 0 G 0 GS 0 MP 0 FG 0 FGA 0 FG% 2 3P 0 3PA 0 3P% 35 2P 0 2PA 0 2P% 6 eFG% 2 FT 0 FTA 0 FT% 29 ORB 0 DRB 0 TRB 0 AST 0 STL 0 BLK 0 TOV 0 PF 0 PTS 0 dtype: int64
df=raw.fillna(0)
df.isnull().sum()
Rk 0 Player 0 Pos 0 Age 0 Tm 0 G 0 GS 0 MP 0 FG 0 FGA 0 FG% 0 3P 0 3PA 0 3P% 0 2P 0 2PA 0 2P% 0 eFG% 0 FT 0 FTA 0 FT% 0 ORB 0 DRB 0 TRB 0 AST 0 STL 0 BLK 0 TOV 0 PF 0 PTS 0 dtype: int64
df
| Rk | Player | Pos | Age | Tm | G | GS | MP | FG | FGA | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | ... | .509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | 2 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | ... | 0 | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | 3 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | ... | .444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | 4 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | ... | .799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | 5 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | ... | .872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | 536 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | ... | .833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 727 | 537 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | ... | .628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 728 | 538 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | ... | .886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 729 | 539 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | ... | .714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 730 | 540 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | ... | .789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
705 rows × 30 columns
df=df.drop(['Rk'],axis=1)
df
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | .544 | ... | .509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | .125 | ... | 0 | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | .614 | ... | .444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | .570 | ... | .799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | .473 | ... | .872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 726 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | .462 | ... | .833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 727 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | .559 | ... | .628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 728 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | .438 | ... | .886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 729 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | .559 | ... | .714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 730 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | .652 | ... | .789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
705 rows × 29 columns
df.to_csv("nba2021.csv",index=False)
df=pd.read_csv("nba2021.csv")
df
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | 0.544 | ... | 0.509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | 0.125 | ... | 0.000 | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | 0.614 | ... | 0.444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | 0.570 | ... | 0.799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | 0.473 | ... | 0.872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 700 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | 0.462 | ... | 0.833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 701 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | 0.559 | ... | 0.628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 702 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | 0.438 | ... | 0.886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 703 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | 0.559 | ... | 0.714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 704 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | 0.652 | ... | 0.789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
705 rows × 29 columns
pd.set_option('display.max_rows', 10)
df.dtypes
Player object
Pos object
Age int64
Tm object
G int64
...
STL float64
BLK float64
TOV float64
PF float64
PTS float64
Length: 29, dtype: object
df.select_dtypes(include=['number'])\
Input In [72] df.select_dtypes(include=['number'])\ ^ SyntaxError: unexpected EOF while parsing
df.select_dtypes(include=['object'])
| Player | Pos | Tm | |
|---|---|---|---|
| 0 | Precious Achiuwa | PF | MIA |
| 1 | Jaylen Adams | PG | MIL |
| 2 | Steven Adams | C | NOP |
| 3 | Bam Adebayo | C | MIA |
| 4 | LaMarcus Aldridge | C | TOT |
| ... | ... | ... | ... |
| 700 | Delon Wright | PG | SAC |
| 701 | Thaddeus Young | PF | CHI |
| 702 | Trae Young | PG | ATL |
| 703 | Cody Zeller | C | CHO |
| 704 | Ivica Zubac | C | LAC |
705 rows × 3 columns
playerpoint=df[df.PTS == df.PTS.max()]
playerpoint
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 151 | Stephen Curry | PG | 32 | GSW | 63 | 63 | 34.2 | 10.4 | 21.7 | 0.482 | ... | 0.916 | 0.5 | 5.0 | 5.5 | 5.8 | 1.2 | 0.1 | 3.4 | 1.9 | 32.0 |
1 rows × 29 columns
playerpoint.Tm
151 GSW Name: Tm, dtype: object
playerpoint.Pos
151 PG Name: Pos, dtype: object
playerpoint.G
151 63 Name: G, dtype: int64
df[df.PTS>20]
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 17 | Giannis Antetokounmpo | PF | 26 | MIL | 61 | 61 | 33.0 | 10.3 | 18.0 | 0.569 | ... | 0.685 | 1.6 | 9.4 | 11.0 | 5.9 | 1.2 | 1.2 | 3.4 | 2.8 | 28.1 |
| 45 | Bradley Beal | SG | 27 | WAS | 60 | 60 | 35.8 | 11.2 | 23.0 | 0.485 | ... | 0.889 | 1.2 | 3.5 | 4.7 | 4.4 | 1.2 | 0.4 | 3.1 | 2.3 | 31.3 |
| 71 | Devin Booker | SG | 24 | PHO | 67 | 67 | 33.9 | 9.3 | 19.2 | 0.484 | ... | 0.867 | 0.5 | 3.7 | 4.2 | 4.3 | 0.8 | 0.2 | 3.1 | 2.7 | 25.6 |
| 89 | Malcolm Brogdon | PG | 28 | IND | 56 | 56 | 34.5 | 7.9 | 17.5 | 0.453 | ... | 0.864 | 1.0 | 4.2 | 5.3 | 5.9 | 0.9 | 0.3 | 2.1 | 2.0 | 21.2 |
| 94 | Jaylen Brown | SG | 24 | BOS | 58 | 58 | 34.5 | 9.3 | 19.2 | 0.484 | ... | 0.764 | 1.2 | 4.8 | 6.0 | 3.4 | 1.2 | 0.6 | 2.7 | 2.9 | 24.7 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 665 | John Wall | PG | 30 | HOU | 40 | 40 | 32.2 | 7.3 | 18.2 | 0.404 | ... | 0.749 | 0.4 | 2.8 | 3.2 | 6.9 | 1.1 | 0.8 | 3.5 | 1.2 | 20.6 |
| 675 | Russell Westbrook | PG | 32 | WAS | 65 | 65 | 36.4 | 8.4 | 19.0 | 0.439 | ... | 0.656 | 1.7 | 9.9 | 11.5 | 11.7 | 1.4 | 0.4 | 4.8 | 2.9 | 22.2 |
| 688 | Zion Williamson | PF | 20 | NOP | 61 | 61 | 33.2 | 10.4 | 17.0 | 0.611 | ... | 0.698 | 2.7 | 4.5 | 7.2 | 3.7 | 0.9 | 0.6 | 2.7 | 2.2 | 27.0 |
| 696 | Christian Wood | C | 25 | HOU | 41 | 41 | 32.3 | 8.0 | 15.6 | 0.514 | ... | 0.631 | 1.9 | 7.8 | 9.6 | 1.7 | 0.8 | 1.2 | 2.0 | 2.1 | 21.0 |
| 702 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | 0.438 | ... | 0.886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
49 rows × 29 columns
df[df['3P']==df['3P'].max()]
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 151 | Stephen Curry | PG | 32 | GSW | 63 | 63 | 34.2 | 10.4 | 21.7 | 0.482 | ... | 0.916 | 0.5 | 5.0 | 5.5 | 5.8 | 1.2 | 0.1 | 3.4 | 1.9 | 32.0 |
1 rows × 29 columns
df[df['AST']==df['AST'].max()]
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 675 | Russell Westbrook | PG | 32 | WAS | 65 | 65 | 36.4 | 8.4 | 19.0 | 0.439 | ... | 0.656 | 1.7 | 9.9 | 11.5 | 11.7 | 1.4 | 0.4 | 4.8 | 2.9 | 22.2 |
1 rows × 29 columns
LAL=df.groupby('Tm').get_group('LAL')
LAL[LAL.PTS == LAL.PTS.max()]
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 329 | LeBron James | PG | 36 | LAL | 45 | 45 | 33.4 | 9.4 | 18.3 | 0.513 | ... | 0.698 | 0.6 | 7.0 | 7.7 | 7.8 | 1.1 | 0.6 | 3.7 | 1.6 | 25.0 |
1 rows × 29 columns
df.groupby('Pos').PTS.describe()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Pos | ||||||||
| C | 138.0 | 8.451449 | 5.648205 | 0.0 | 4.775 | 7.55 | 11.200 | 28.5 |
| C-PF | 2.0 | 8.450000 | 7.141778 | 3.4 | 5.925 | 8.45 | 10.975 | 13.5 |
| PF | 143.0 | 7.484615 | 5.924184 | 0.0 | 3.150 | 6.00 | 10.050 | 28.1 |
| PF-C | 1.0 | 7.000000 | NaN | 7.0 | 7.000 | 7.00 | 7.000 | 7.0 |
| PF-SF | 2.0 | 4.150000 | 1.484924 | 3.1 | 3.625 | 4.15 | 4.675 | 5.2 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... |
| SF-PF | 3.0 | 5.366667 | 4.119871 | 1.5 | 3.200 | 4.90 | 7.300 | 9.7 |
| SF-SG | 3.0 | 9.733333 | 6.833984 | 3.6 | 6.050 | 8.50 | 12.800 | 17.1 |
| SG | 162.0 | 9.485185 | 6.427515 | 0.1 | 4.400 | 8.25 | 12.425 | 31.3 |
| SG-PG | 2.0 | 8.550000 | 2.333452 | 6.9 | 7.725 | 8.55 | 9.375 | 10.2 |
| SG-SF | 2.0 | 15.100000 | 4.949747 | 11.6 | 13.350 | 15.10 | 16.850 | 18.6 |
13 rows × 8 columns
positions=['C','PF','SG','SF','PG']
POS= df[df['Pos'].isin(positions)]
POS
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | 0.544 | ... | 0.509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | 0.125 | ... | 0.000 | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | 0.614 | ... | 0.444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | 0.570 | ... | 0.799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | 0.473 | ... | 0.872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 700 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | 0.462 | ... | 0.833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 701 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | 0.559 | ... | 0.628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 702 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | 0.438 | ... | 0.886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 703 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | 0.559 | ... | 0.714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 704 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | 0.652 | ... | 0.789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
689 rows × 29 columns
POS.groupby('Pos').PTS.describe()
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| Pos | ||||||||
| C | 138.0 | 8.451449 | 5.648205 | 0.0 | 4.775 | 7.55 | 11.200 | 28.5 |
| PF | 143.0 | 7.484615 | 5.924184 | 0.0 | 3.150 | 6.00 | 10.050 | 28.1 |
| PG | 127.0 | 9.625984 | 7.062737 | 0.0 | 4.450 | 7.50 | 13.250 | 32.0 |
| SF | 119.0 | 7.811765 | 6.013081 | 0.0 | 3.900 | 6.10 | 10.900 | 26.4 |
| SG | 162.0 | 9.485185 | 6.427515 | 0.1 | 4.400 | 8.25 | 12.425 | 31.3 |
PTS= df[['Pos','PTS']]
positions=['C','PF','SF','PG','SG']
PTS=PTS[PTS['Pos'].isin(positions)]
PTS
| Pos | PTS | |
|---|---|---|
| 0 | PF | 5.0 |
| 1 | PG | 0.3 |
| 2 | C | 7.6 |
| 3 | C | 18.7 |
| 4 | C | 13.5 |
| ... | ... | ... |
| 700 | PG | 10.0 |
| 701 | PF | 12.1 |
| 702 | PG | 25.3 |
| 703 | C | 9.4 |
| 704 | C | 9.0 |
689 rows × 2 columns
PTS['PTS'].hist(by=PTS['Pos'])
array([[<AxesSubplot:title={'center':'C'}>,
<AxesSubplot:title={'center':'PF'}>],
[<AxesSubplot:title={'center':'PG'}>,
<AxesSubplot:title={'center':'SF'}>],
[<AxesSubplot:title={'center':'SG'}>, <AxesSubplot:>]],
dtype=object)
PTS['PTS'].hist(by=PTS['Pos'], layout=(1,5))
array([<AxesSubplot:title={'center':'C'}>,
<AxesSubplot:title={'center':'PF'}>,
<AxesSubplot:title={'center':'PG'}>,
<AxesSubplot:title={'center':'SF'}>,
<AxesSubplot:title={'center':'SG'}>], dtype=object)
PTS['PTS'].hist(by=PTS['Pos'], layout=(1,5), figsize=(16,2))
array([<AxesSubplot:title={'center':'C'}>,
<AxesSubplot:title={'center':'PF'}>,
<AxesSubplot:title={'center':'PG'}>,
<AxesSubplot:title={'center':'SF'}>,
<AxesSubplot:title={'center':'SG'}>], dtype=object)
import seaborn as sns
import matplotlib.pyplot as plt
g=sns.FacetGrid(PTS, col="Pos")
g.map(plt.hist,"PTS");
PTS.boxplot(column='PTS',by='Pos')
<AxesSubplot:title={'center':'PTS'}, xlabel='Pos'>
import seaborn as sns
sns.boxplot(x='Pos',y='PTS',data=PTS)
<AxesSubplot:xlabel='Pos', ylabel='PTS'>
sns.boxplot(x='Pos',y='PTS',data=PTS)
sns.stripplot(x='Pos',y='PTS',data=PTS,
jitter=True,
marker='o',
alpha=0.8,
color="black")
<AxesSubplot:xlabel='Pos', ylabel='PTS'>
corr=df.corr()
corr
| Age | G | GS | MP | FG | FGA | FG% | 3P | 3PA | 3P% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | -0.345517 | -0.283681 | -0.192756 | -0.262490 | -0.172875 | -0.370059 | 0.059354 | 0.053957 | 0.056855 | ... | -0.034809 | -0.355144 | -0.325032 | -0.354690 | -0.000381 | -0.096071 | -0.349642 | -0.162726 | -0.347479 | -0.225235 |
| G | -0.345517 | 1.000000 | 0.648273 | 0.470698 | 0.365466 | 0.360958 | -0.077819 | 0.355054 | 0.341830 | 0.202187 | ... | 0.194120 | 0.016228 | 0.235345 | 0.172882 | 0.269564 | 0.384014 | -0.085464 | 0.250864 | 0.278208 | 0.358279 |
| GS | -0.283681 | 0.648273 | 1.000000 | 0.904311 | 0.858556 | 0.835419 | -0.316770 | 0.512978 | 0.545819 | -0.054649 | ... | -0.034834 | 0.233035 | 0.709844 | 0.584480 | 0.729174 | 0.764005 | 0.166020 | 0.806211 | 0.664151 | 0.844115 |
| MP | -0.192756 | 0.470698 | 0.904311 | 1.000000 | 0.949435 | 0.961235 | -0.447310 | 0.686779 | 0.725822 | 0.092808 | ... | 0.119708 | 0.081652 | 0.671941 | 0.504309 | 0.870400 | 0.897641 | 0.038798 | 0.909048 | 0.674765 | 0.951051 |
| FG | -0.262490 | 0.365466 | 0.858556 | 0.949435 | 1.000000 | 0.979139 | -0.321795 | 0.608308 | 0.647975 | 0.023557 | ... | 0.069167 | 0.141489 | 0.695303 | 0.541744 | 0.867253 | 0.815677 | 0.083778 | 0.940158 | 0.617646 | 0.993363 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| STL | -0.096071 | 0.384014 | 0.764005 | 0.897641 | 0.815677 | 0.849021 | -0.469837 | 0.619763 | 0.661657 | 0.111419 | ... | 0.168319 | -0.030743 | 0.504083 | 0.346050 | 0.875917 | 1.000000 | -0.064736 | 0.834164 | 0.554084 | 0.823366 |
| BLK | -0.349642 | -0.085464 | 0.166020 | 0.038798 | 0.083778 | -0.075097 | 0.479992 | -0.595553 | -0.559639 | -0.791483 | ... | -0.605296 | 0.916050 | 0.675897 | 0.799923 | -0.228683 | -0.064736 | 1.000000 | 0.068952 | 0.634553 | -0.006807 |
| TOV | -0.162726 | 0.250864 | 0.806211 | 0.909048 | 0.940158 | 0.937102 | -0.434901 | 0.549428 | 0.601504 | -0.036053 | ... | 0.019052 | 0.117118 | 0.669709 | 0.515018 | 0.934232 | 0.834164 | 0.068952 | 1.000000 | 0.616205 | 0.942888 |
| PF | -0.347479 | 0.278208 | 0.664151 | 0.674765 | 0.617646 | 0.532462 | 0.023450 | 0.048993 | 0.099438 | -0.404959 | ... | -0.263962 | 0.640282 | 0.853605 | 0.828871 | 0.386731 | 0.554084 | 0.634553 | 0.616205 | 1.000000 | 0.560818 |
| PTS | -0.225235 | 0.358279 | 0.844115 | 0.951051 | 0.993363 | 0.992835 | -0.398770 | 0.679562 | 0.717078 | 0.099527 | ... | 0.133879 | 0.041110 | 0.628774 | 0.459399 | 0.896770 | 0.823366 | -0.006807 | 0.942888 | 0.560818 | 1.000000 |
26 rows × 26 columns
sns.heatmap(corr)
<AxesSubplot:>
import seaborn as sns
import matplotlib.pyplot as plt
fig, ax= plt.subplots(figsize=(7,5))
sns.heatmap(corr,square=True)
<AxesSubplot:>
# https://seaborn.pydata.org/generated/seaborn.heatmap.html
import numpy as np
import seaborn as sns
mask = np.zeros_like(corr)
mask[np.triu_indices_from(mask)] = True
with sns.axes_style("white"):
f, ax = plt.subplots(figsize=(7, 5))
ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
df
| Age | G | GS | MP | FG | FGA | FG% | 3P | 3PA | 3P% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Age | 1.000000 | 0.061119 | 0.092335 | 0.198153 | 0.127956 | 0.132420 | 0.048242 | 0.185962 | 0.163591 | 0.113262 | ... | 0.129573 | 0.017896 | 0.139539 | 0.111839 | 0.226687 | 0.178456 | 0.042726 | 0.121069 | 0.111609 | 0.138877 |
| G | 0.061119 | 1.000000 | 0.644453 | 0.553135 | 0.472687 | 0.441459 | 0.296131 | 0.397096 | 0.362864 | 0.297257 | ... | 0.322548 | 0.264688 | 0.404790 | 0.389992 | 0.337747 | 0.409568 | 0.181272 | 0.325467 | 0.392205 | 0.465707 |
| GS | 0.092335 | 0.644453 | 1.000000 | 0.764826 | 0.716464 | 0.692610 | 0.221514 | 0.511321 | 0.499250 | 0.187241 | ... | 0.209549 | 0.370520 | 0.629791 | 0.593427 | 0.550817 | 0.553341 | 0.322566 | 0.591306 | 0.536160 | 0.713721 |
| MP | 0.198153 | 0.553135 | 0.764826 | 1.000000 | 0.879032 | 0.888528 | 0.256452 | 0.693692 | 0.707618 | 0.326397 | ... | 0.373425 | 0.381314 | 0.730310 | 0.672641 | 0.712777 | 0.753880 | 0.358051 | 0.736907 | 0.705754 | 0.879928 |
| FG | 0.127956 | 0.472687 | 0.716464 | 0.879032 | 1.000000 | 0.975263 | 0.325081 | 0.669591 | 0.669400 | 0.287325 | ... | 0.321964 | 0.381583 | 0.724941 | 0.668630 | 0.720105 | 0.652066 | 0.340571 | 0.794838 | 0.595233 | 0.990473 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| STL | 0.178456 | 0.409568 | 0.553341 | 0.753880 | 0.652066 | 0.669474 | 0.177922 | 0.486753 | 0.504022 | 0.242138 | ... | 0.315748 | 0.234928 | 0.502869 | 0.454899 | 0.680935 | 1.000000 | 0.238287 | 0.603082 | 0.531279 | 0.649888 |
| BLK | 0.042726 | 0.181272 | 0.322566 | 0.358051 | 0.340571 | 0.248033 | 0.409630 | -0.034478 | -0.041055 | -0.139143 | ... | 0.001805 | 0.653452 | 0.575798 | 0.639057 | 0.087960 | 0.238287 | 1.000000 | 0.264079 | 0.538745 | 0.303450 |
| TOV | 0.121069 | 0.325467 | 0.591306 | 0.736907 | 0.794838 | 0.802002 | 0.144745 | 0.496285 | 0.523658 | 0.176498 | ... | 0.212927 | 0.288535 | 0.622156 | 0.561929 | 0.822754 | 0.603082 | 0.264079 | 1.000000 | 0.564525 | 0.806638 |
| PF | 0.111609 | 0.392205 | 0.536160 | 0.705754 | 0.595233 | 0.563711 | 0.337232 | 0.322891 | 0.339857 | 0.133237 | ... | 0.225081 | 0.530736 | 0.666796 | 0.669975 | 0.399748 | 0.531279 | 0.538745 | 0.564525 | 1.000000 | 0.576618 |
| PTS | 0.138877 | 0.465707 | 0.713721 | 0.879928 | 0.990473 | 0.980010 | 0.278695 | 0.722054 | 0.722231 | 0.316505 | ... | 0.353171 | 0.320283 | 0.696320 | 0.627926 | 0.740902 | 0.649888 | 0.303450 | 0.806638 | 0.576618 | 1.000000 |
26 rows × 26 columns
dataframe=pd.read_csv("nba2021.csv")
dataframe
| Player | Pos | Age | Tm | G | GS | MP | FG | FGA | FG% | ... | FT% | ORB | DRB | TRB | AST | STL | BLK | TOV | PF | PTS | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Precious Achiuwa | PF | 21 | MIA | 61 | 4 | 12.1 | 2.0 | 3.7 | 0.544 | ... | 0.509 | 1.2 | 2.2 | 3.4 | 0.5 | 0.3 | 0.5 | 0.7 | 1.5 | 5.0 |
| 1 | Jaylen Adams | PG | 24 | MIL | 7 | 0 | 2.6 | 0.1 | 1.1 | 0.125 | ... | 0.000 | 0.0 | 0.4 | 0.4 | 0.3 | 0.0 | 0.0 | 0.0 | 0.1 | 0.3 |
| 2 | Steven Adams | C | 27 | NOP | 58 | 58 | 27.7 | 3.3 | 5.3 | 0.614 | ... | 0.444 | 3.7 | 5.2 | 8.9 | 1.9 | 0.9 | 0.7 | 1.3 | 1.9 | 7.6 |
| 3 | Bam Adebayo | C | 23 | MIA | 64 | 64 | 33.5 | 7.1 | 12.5 | 0.570 | ... | 0.799 | 2.2 | 6.7 | 9.0 | 5.4 | 1.2 | 1.0 | 2.6 | 2.3 | 18.7 |
| 4 | LaMarcus Aldridge | C | 35 | TOT | 26 | 23 | 25.9 | 5.4 | 11.4 | 0.473 | ... | 0.872 | 0.7 | 3.8 | 4.5 | 1.9 | 0.4 | 1.1 | 1.0 | 1.8 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 700 | Delon Wright | PG | 28 | SAC | 27 | 8 | 25.8 | 3.9 | 8.3 | 0.462 | ... | 0.833 | 1.0 | 2.9 | 3.9 | 3.6 | 1.6 | 0.4 | 1.3 | 1.1 | 10.0 |
| 701 | Thaddeus Young | PF | 32 | CHI | 68 | 23 | 24.3 | 5.4 | 9.7 | 0.559 | ... | 0.628 | 2.5 | 3.8 | 6.2 | 4.3 | 1.1 | 0.6 | 2.0 | 2.2 | 12.1 |
| 702 | Trae Young | PG | 22 | ATL | 63 | 63 | 33.7 | 7.7 | 17.7 | 0.438 | ... | 0.886 | 0.6 | 3.3 | 3.9 | 9.4 | 0.8 | 0.2 | 4.1 | 1.8 | 25.3 |
| 703 | Cody Zeller | C | 28 | CHO | 48 | 21 | 20.9 | 3.8 | 6.8 | 0.559 | ... | 0.714 | 2.5 | 4.4 | 6.8 | 1.8 | 0.6 | 0.4 | 1.1 | 2.5 | 9.4 |
| 704 | Ivica Zubac | C | 23 | LAC | 72 | 33 | 22.3 | 3.6 | 5.5 | 0.652 | ... | 0.789 | 2.6 | 4.6 | 7.2 | 1.3 | 0.3 | 0.9 | 1.1 | 2.6 | 9.0 |
705 rows × 29 columns
number= dataframe.select_dtypes(include=["number"])
number.iloc[:,:5]
| Age | G | GS | MP | FG | |
|---|---|---|---|---|---|
| 0 | 21 | 61 | 4 | 12.1 | 2.0 |
| 1 | 24 | 7 | 0 | 2.6 | 0.1 |
| 2 | 27 | 58 | 58 | 27.7 | 3.3 |
| 3 | 23 | 64 | 64 | 33.5 | 7.1 |
| 4 | 35 | 26 | 23 | 25.9 | 5.4 |
| ... | ... | ... | ... | ... | ... |
| 700 | 28 | 27 | 8 | 25.8 | 3.9 |
| 701 | 32 | 68 | 23 | 24.3 | 5.4 |
| 702 | 22 | 63 | 63 | 33.7 | 7.7 |
| 703 | 28 | 48 | 21 | 20.9 | 3.8 |
| 704 | 23 | 72 | 33 | 22.3 | 3.6 |
705 rows × 5 columns
selections = ['Age', 'G', 'STL', 'BLK', 'AST', 'PTS']
newdf= dataframe[selections]
newdf
| Age | G | STL | BLK | AST | PTS | |
|---|---|---|---|---|---|---|
| 0 | 21 | 61 | 0.3 | 0.5 | 0.5 | 5.0 |
| 1 | 24 | 7 | 0.0 | 0.0 | 0.3 | 0.3 |
| 2 | 27 | 58 | 0.9 | 0.7 | 1.9 | 7.6 |
| 3 | 23 | 64 | 1.2 | 1.0 | 5.4 | 18.7 |
| 4 | 35 | 26 | 0.4 | 1.1 | 1.9 | 13.5 |
| ... | ... | ... | ... | ... | ... | ... |
| 700 | 28 | 27 | 1.6 | 0.4 | 3.6 | 10.0 |
| 701 | 32 | 68 | 1.1 | 0.6 | 4.3 | 12.1 |
| 702 | 22 | 63 | 0.8 | 0.2 | 9.4 | 25.3 |
| 703 | 28 | 48 | 0.6 | 0.4 | 1.8 | 9.4 |
| 704 | 23 | 72 | 0.3 | 0.9 | 1.3 | 9.0 |
705 rows × 6 columns
import seaborn as sns
g=sns.PairGrid(newdf)
g.map(plt.scatter);
import seaborn as sns
g=sns.PairGrid(number)
g.map(plt.scatter)
<seaborn.axisgrid.PairGrid at 0x1f93e280a00>